################################################################################ 
#
# The distribution of hate speech and its implications for content moderation
# PSRM - Replication package
# Table D3
#
################################################################################ 

library(dplyr)
library(readr)
library(kableExtra)
library(tidyr)

rm(list = setdiff(ls(), ls(pattern = "^wd|^setsave$")))

# CH
## this code estimates metrics with a stratified estimator after having annotated the sample produced above
sample_annotated_ch <- readRDS(paste0(wd_data,"/twitter_classifier_validation/ch_sample_to_annotate_final_anon.rds"))
sample_annotations_pi <- readRDS(paste0(wd_data,"/twitter_classifier_validation/ch_sample_to_annotate_pi_anon.rds"))


names(sample_annotations_pi)


sample_annotated_ch$hate_speech_human <- ifelse(sample_annotated_ch$is_hate_speech_s == 1, 1, 0) # & is.na(sample_annotated_ch$target_group_s) == F

insample <- sample_annotations_pi
insample$hate_speech_human <- sample_annotated_ch$hate_speech_human

# insample <- insample %>% filter(is.na(tweet_original) == F)
insample <- insample %>% filter(tweet_text_na == F)

hate_speech_truth <- "hate_speech_human" #this variable should be 0 for non hate and 1 for hate as per the gold standard "truth"
classifying_var <- "prob_hatespeech" #this should be the variable on the basis of which classification and stratification happened


threshold <- 0.85 #this should be the threshold used for classification (i.e. 0.85 in CH and 0.8 in US)

## define functions for stratified sampling estimators 
do_metrics <- function(tr0_pred0, tr1_pred0, tr0_pred1, tr1_pred1){
  precision <- tr1_pred1/(tr1_pred1+tr0_pred1) 
  recall <- tr1_pred1/(tr1_pred1+tr1_pred0) 
  f1 <- ifelse(precision==0 & recall==0, 0, 2/((1/precision)+(1/recall)) )
  accuracy <- tr0_pred0+tr1_pred1
  MCC <- (tr1_pred1*tr0_pred0 - tr1_pred0*tr0_pred1)/sqrt((tr1_pred1+tr0_pred1)*(tr1_pred1+tr1_pred0)*(tr0_pred0+tr0_pred1)*(tr0_pred0+tr1_pred0))
  kappa <- 2*(tr1_pred1*tr0_pred0 - tr0_pred1*tr1_pred0)/((tr1_pred1+tr0_pred1)*(tr0_pred1+tr0_pred0) + (tr1_pred1+tr1_pred0)*(tr0_pred0+tr1_pred0))
  precision_rev <- tr0_pred0/(tr0_pred0+tr1_pred0) 
  recall_rev <- tr0_pred0/(tr0_pred0+tr0_pred1) 
  f1_neg <- ifelse(precision_rev==0 & recall_rev==0, 0, (2/((1/precision_rev)+(1/recall_rev))))
  f1_unweighted <- (f1+ f1_neg)/2
  f1_weighted <- ((tr1_pred1+tr1_pred0)*f1)+((tr0_pred0+tr0_pred1)*f1_neg)
  bm <- recall + (tr0_pred0/(tr0_pred0+tr0_pred1)) - 1
  
  output <- data.frame(precision=precision, recall=recall, f1=f1, accuracy=accuracy, 
                       MCC=MCC, Kappa=kappa, F1_unweighted=f1_unweighted, F1_weighted=f1_weighted, BM=bm)
  return(output)
}

our_metrics <- function(data, truth, pred, probs="Prob", threshold){
  if(is.null(probs)){
    warning("No probabilities in data")
    data$Prob <- 1
  } else {
    data$Prob <- unlist(data[,probs])
  }
  data$truth <- unlist(data[,truth])
  data$var1 <- unlist(data[,pred])
  data$subgroup_hate <- factor(ifelse(data$truth==0, 
                                      ifelse(data$var1<threshold, "tr0_pred0", "tr0_pred1"), 
                                      ifelse(data$var1<threshold, "tr1_pred0", "tr1_pred1")),
                               levels=c("tr0_pred0", "tr0_pred1", "tr1_pred0", "tr1_pred1"))
  data <- data[!is.na(data$subgroup_hate),]
  tr0_pred0 <- weighted.mean((data$subgroup_hate=="tr0_pred0"), 1/data$Prob)
  tr1_pred0 <- weighted.mean((data$subgroup_hate=="tr1_pred0"), 1/data$Prob)
  tr0_pred1 <- weighted.mean((data$subgroup_hate=="tr0_pred1"), 1/data$Prob)
  tr1_pred1 <- weighted.mean((data$subgroup_hate=="tr1_pred1"), 1/data$Prob)
  
  output <- do_metrics(tr0_pred0, tr1_pred0, tr0_pred1, tr1_pred1)
  return(output)
}

# EMLNP Classifier
our_metrics(insample, truth=hate_speech_truth, pred=classifying_var, threshold=threshold)



# Compute metrics for multiple thresholds
thresholds <- seq(0.25, 0.85, by = 0.01)[-1]
metrics_list <- lapply(thresholds, function(t) our_metrics(data=insample, truth=hate_speech_truth, pred=classifying_var, threshold = t))
metrics_df <- do.call(rbind, metrics_list)
metrics_df$threshold <- thresholds
# Display the datafra

# US 
## this code estimates metrics with a stratified estimator after having annotated the sample produced above
sample_annotated_us <- readRDS(paste0(wd_data,"/twitter_classifier_validation/us_sample_to_annotate_final_anon.rds"))
sample_annotations_pi_us <- readRDS(paste0(wd_data,"/twitter_classifier_validation/us_sample_to_annotate_pi_anon.rds"))




names(sample_annotations_pi_us)

sample_annotated_us$hate_speech_human <- ifelse(sample_annotated_us$is_hate_speech_s == 1, 1, 0) #  & is.na(sample_annotated_us$target_group_1_s) == F

insample <- sample_annotations_pi_us
insample$hate_speech_human <- sample_annotated_us$hate_speech_human
hate_speech_truth <- "hate_speech_human" #this variable should be 0 for non hate and 1 for hate as per the gold standard "truth"
classifying_var <- "MAX_IDENTITY_BOTH_TOX" #this should be the variable on the basis of which classification and stratification happened
threshold <- 0.8 #this should be the threshold used for classification (i.e. 0.85 in CH and 0.8 in US)

## define functions for stratified sampling estimators 
do_metrics <- function(tr0_pred0, tr1_pred0, tr0_pred1, tr1_pred1){
  precision <- tr1_pred1/(tr1_pred1+tr0_pred1) 
  recall <- tr1_pred1/(tr1_pred1+tr1_pred0) 
  f1 <- ifelse(precision==0 & recall==0, 0, 2/((1/precision)+(1/recall)) )
  accuracy <- tr0_pred0+tr1_pred1
  MCC <- (tr1_pred1*tr0_pred0 - tr1_pred0*tr0_pred1)/sqrt((tr1_pred1+tr0_pred1)*(tr1_pred1+tr1_pred0)*(tr0_pred0+tr0_pred1)*(tr0_pred0+tr1_pred0))
  kappa <- 2*(tr1_pred1*tr0_pred0 - tr0_pred1*tr1_pred0)/((tr1_pred1+tr0_pred1)*(tr0_pred1+tr0_pred0) + (tr1_pred1+tr1_pred0)*(tr0_pred0+tr1_pred0))
  precision_rev <- tr0_pred0/(tr0_pred0+tr1_pred0) 
  recall_rev <- tr0_pred0/(tr0_pred0+tr0_pred1) 
  f1_neg <- ifelse(precision_rev==0 & recall_rev==0, 0, (2/((1/precision_rev)+(1/recall_rev))))
  f1_unweighted <- (f1+ f1_neg)/2
  f1_weighted <- ((tr1_pred1+tr1_pred0)*f1)+((tr0_pred0+tr0_pred1)*f1_neg)
  bm <- recall + (tr0_pred0/(tr0_pred0+tr0_pred1)) - 1
  
  output <- data.frame(precision=precision, recall=recall, f1=f1, accuracy=accuracy, 
                       MCC=MCC, Kappa=kappa, F1_unweighted=f1_unweighted, F1_weighted=f1_weighted, BM=bm)
  return(output)
}

our_metrics <- function(data, truth, pred, probs="Prob", threshold){
  if(is.null(probs)){
    warning("No probabilities in data")
    data$Prob <- 1
  } else {
    data$Prob <- unlist(data[,probs])
  }
  data$truth <- unlist(data[,truth])
  data$var1 <- unlist(data[,pred])
  data$subgroup_hate <- factor(ifelse(data$truth==0, 
                                      ifelse(data$var1<threshold, "tr0_pred0", "tr0_pred1"), 
                                      ifelse(data$var1<threshold, "tr1_pred0", "tr1_pred1")),
                               levels=c("tr0_pred0", "tr0_pred1", "tr1_pred0", "tr1_pred1"))
  data <- data[!is.na(data$subgroup_hate),]
  tr0_pred0 <- weighted.mean((data$subgroup_hate=="tr0_pred0"), 1/data$Prob)
  tr1_pred0 <- weighted.mean((data$subgroup_hate=="tr1_pred0"), 1/data$Prob)
  tr0_pred1 <- weighted.mean((data$subgroup_hate=="tr0_pred1"), 1/data$Prob)
  tr1_pred1 <- weighted.mean((data$subgroup_hate=="tr1_pred1"), 1/data$Prob)
  
  output <- do_metrics(tr0_pred0, tr1_pred0, tr0_pred1, tr1_pred1)
  return(output)
}

our_metrics(insample, truth=hate_speech_truth, pred=classifying_var, threshold=threshold)


# Compute metrics for multiple thresholds
thresholds <- seq(0.25, 0.85, by = 0.01)[-1]
metrics_list <- lapply(thresholds, function(t) our_metrics(data=insample, truth=hate_speech_truth, pred=classifying_var, threshold = t))
metrics_df_us <- do.call(rbind, metrics_list)
metrics_df_us$threshold <- thresholds



#Make Table for D.3 

metrics_df_ch_d3 <- metrics_df %>% filter(threshold == 0.85) %>% mutate(group = "Swiss Classifier")
metrics_df_us_d3 <- metrics_df_us %>% filter(threshold == 0.80) %>% mutate(group = "US Classifier")

table_d3 <- bind_rows(metrics_df_ch_d3, metrics_df_us_d3) 
table_d3 <- table_d3 %>%
  # 1) pivot everything except `group` into long form
  pivot_longer(
    cols      = -group,
    names_to  = "metrics",
    values_to = "value"
  ) %>%
  # 2) drop the `threshold` row
  filter(metrics != "threshold") %>%
  # 3) rename the remaining metric keys to exactly the labels you want
  mutate(metrics = recode(metrics,
                          precision      = "Precision",
                          recall         = "Recall",
                          f1             = "F1-score",
                          accuracy       = "Accuracy",
                          MCC            = "MCC",
                          Kappa          = "Kappa",
                          F1_unweighted  = "Unweighted F1",
                          F1_weighted    = "Weighted F1",
                          BM             = "Youden’s J (BM)"
  )) %>%
  # 4) spread `group` back out to wide
  pivot_wider(
    names_from  = group,
    values_from = value
  )

write_csv(table_d3, paste0(wd_res,"/tables/table_d3.csv"))
cat("\n====================\n")
cat("Saved Table D3")
cat("\n====================\n")

latex_tbl <- table_d3 %>%
  select(metrics, `Swiss Classifier`, `US Classifier`) %>%  # enforce column order
  kable(
    format      = "latex",
    booktabs    = TRUE,
    linesep     = "",
    align       = c("l", "r", "r"),
    col.names   = c("Metric", "Swiss Classifier", "US Classifier"),
    caption     = "Performance metrics of baseline hate speech classifiers",
    label       = "tab:table_d3"
  ) %>%
  kable_styling(
    position      = "center",
    latex_options = c("hold_position")
  )

save_kable(latex_tbl, paste0(wd_res,"/tables/table_d3.tex"))
